package com.miguelfonseca.completely.text.analyze.transform; import com.miguelfonseca.completely.text.analyze.Analyzer; import java.text.Normalizer; import java.util.Collection; import java.util.LinkedList; import java.util.List; import static com.miguelfonseca.completely.common.Precondition.checkPointer; /** * Strip text diacritics. */ public class DiacriticsTransformer extends Analyzer { @Override public Collection<String> apply(Collection<String> input) { checkPointer(input != null); List<String> result = new LinkedList<>(); for (String text : input) { checkPointer(text != null); StringBuilder builder = new StringBuilder(); String canonical = Normalizer.normalize(text, Normalizer.Form.NFD); for (int i = 0; i < canonical.length(); ++i) { if (Character.getType(canonical.charAt(i)) != Character.NON_SPACING_MARK) { builder.append(canonical.charAt(i)); } } result.add(builder.toString()); } return result; } }